{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pyspark,random"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 随机生成1000个字母"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "ls = [chr(random.randint(65,72)) for i in range(1000)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['D', 'A', 'C', 'D', 'E', 'G', 'E', 'E', 'D', 'D', 'E', 'F', 'G', 'E', 'E', 'F', 'B', 'F', 'E', 'F']\n"
     ]
    }
   ],
   "source": [
    "print(ls[:20])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc = pyspark.SparkContext()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "rdd = sc.parallelize(ls)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# MAP操作，每个字母计数为一"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "maprdd = rdd.map(lambda x: (x ,1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('D', 1), ('A', 1), ('C', 1), ('D', 1), ('E', 1), ('G', 1), ('E', 1), ('E', 1), ('D', 1), ('D', 1)]\n"
     ]
    }
   ],
   "source": [
    "print(maprdd.take(10))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# ReduceByKey操作，同key的数据之间做计算。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "reduceRes = maprdd.reduceByKey(lambda x,y : (x+y))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('B', 124),\n",
       " ('H', 118),\n",
       " ('C', 139),\n",
       " ('A', 130),\n",
       " ('F', 132),\n",
       " ('D', 121),\n",
       " ('E', 128),\n",
       " ('G', 108)]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reduceRes.collect()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
